library(tidyverse)
library(lubridate) 
library(here)
library(kableExtra)
library(broom)
library(janitor)
library(ggridges)
library(patchwork)
library(latex2exp)
library(knitr)
library(modelsummary)
knitr::opts_chunk$set(echo = FALSE, warning = FALSE, fig.width = 6.5)
theme_set(theme_minimal())
all_na <- function(x) any(!is.na(x))
# knitr::opts_chunk$set(fig.pos = "H", out.extra = "") # to control position of figures


apsrcolors2 = c("#646768", "#afd4e0")
apsrcolors2_inv = c("#afd4e0", "#646768")


monthly = read_csv(here("data", "submissions_byyearmonth.csv"))
monthly = monthly %>%
  mutate(year_month = ym(paste0(year, month, sep = "-")))
mean_man = monthly %>%
  filter(year_month >= "2018-01-01" & year_month < "2020-06-01") %>%
  summarize(mean = mean(n))
mean_cur = monthly %>%
  filter(year_month > "2020-05-01" & year_month < "2022-10-01") %>%
  summarize(mean = mean(n))
start_man = as.Date("2018-01-01")
end_man = as.Date("2020-05-30")
start_cur = as.Date("2020-06-01")
end_cur = as.Date("2022-10-01")


# head(monthly)
m = monthly %>%
  filter(year_month >= "2018-01-01" & year_month <= "2022-10-01") %>%
  mutate(announce = if_else(year_month>"2019-07-01", 1, 0), 
         pandemic = if_else(year_month>"2020-03-01", 1, 0), 
         team = if_else(year_month>"2020-05-01", 1, 0), 
         open = if_else(year_month>"2021-08-01", 1, 0), 
         period = announce + pandemic + team + open, 
         period_f = as_factor(period), 
         period_f = fct_recode(period_f, "Reference" = "0", 
                               "Post-announcement" = "1", 
                               "Early pandemic" = "2", 
                               "New team" = "3", 
                               "AY2021-22 start" = "4"))
m1 = lm(n ~ period_f, data = m)
m1_anova = aov(n~period_f, data = m) %>%
  tidy()
m_period = m %>%
  group_by(period_f) %>%
  summarize(mean = mean(n), 
            n = n())
monthly_anova_plot = m %>%
  filter(year_month >= "2018-01-01" & year_month <= "2022-10-01") %>%
  ggplot() +
  geom_rect(aes(xmin=as.Date("2020-05-16"), xmax=as.Date("2020-03-12"), ymin=-Inf, ymax=Inf), fill = "#EEEEEE") + 
  geom_rect(aes(xmin=as.Date("2018-01-01"), xmax=as.Date("2019-07-12"), ymin=-Inf, ymax=Inf), fill = "#EEEEEE") + 
  geom_rect(aes(xmin=as.Date("2021-07-31"), xmax=as.Date("2022-10-01"), ymin=-Inf, ymax=Inf), fill = "#EEEEEE") + 
  geom_line(aes(y=n, x=year_month)) +
  geom_vline(xintercept = as.Date("2020-05-20"), linetype=1) + 
  geom_vline(xintercept = as.Date("2020-03-13"), linetype=2) + 
  geom_vline(xintercept = as.Date("2019-07-12"), linetype=9) + 
  geom_vline(xintercept = as.Date("2021-07-31"), linetype=9) + 
  geom_segment(aes(x=as.Date("2018-01-01"),xend=as.Date("2019-07-12"),y=m_period$mean[1], yend=m_period$mean[1])) +
  geom_segment(aes(x=as.Date("2019-07-12"),xend=as.Date("2020-03-13"),y=m_period$mean[2], yend=m_period$mean[2])) +
  geom_segment(aes(x=as.Date("2020-03-13"),xend=as.Date("2020-05-21"),y=m_period$mean[3], yend=m_period$mean[3])) +
  geom_segment(aes(x=as.Date("2020-05-21"),xend=as.Date("2021-07-31"),y=m_period$mean[4], yend=m_period$mean[4])) +
  geom_segment(aes(x=as.Date("2021-07-31"),xend=as.Date("2022-10-01"),y=m_period$mean[5], yend=m_period$mean[5])) +
  annotate("text", x= as.Date("2018-10-15"), y = 220, label = "Pre-announcement", size = 3) +
  annotate("text", x= as.Date("2019-11-10"), y = 220, label = "Announced", size = 3) +
  annotate("text", x= as.Date("2021-01-01"), y = 220, label = "New team", size = 3) +
  annotate("text", x= as.Date("2022-03-01"), y = 220, label = "AY2021-22+", size = 3) +
  annotate("text", x= as.Date("2018-10-15"), y = 205, label = TeX(paste0("$\\bar{y}=$", round(m_period$mean[1], 1))), size = 3) +
  annotate("text", x= as.Date("2019-11-10"), y = 205, label = TeX(paste0("$\\bar{y}=$", round(m_period$mean[2], 1))), size = 3) +
  annotate("text", x= as.Date("2021-01-15"), y = 35, label = TeX(paste0("$\\bar{y}=$", round(m_period$mean[3], 1))), size = 3) +
  annotate("text", x= as.Date("2021-01-01"), y = 205, label = TeX(paste0("$\\bar{y}=$", round(m_period$mean[4], 1))), size = 3) +
  annotate("text", x= as.Date("2022-03-01"), y = 205, label = TeX(paste0("$\\bar{y}=$", round(m_period$mean[5], 1))), size = 3) +
  geom_curve(aes(x = as.Date("2020-10-01"), y = 40, xend = as.Date("2020-05-01"), yend = 25), curvature = -.3, arrow = arrow(type = "closed", length = unit(0.1, "inches"))) + 
  annotate("text", x= as.Date("2020-12-01"), y = 55, label = "Initial months\nof pandemic", size = 3) +
  labs(y = "Monthy new submissions", x = "Month & year", caption = paste0("One-way ANOVA difference in means across periods: F = ", round(m1_anova$statistic[1], 3), " (p = ", round(m1_anova$p.value[1], 3), ")\nNew submissions were not accepted in August 2019.")) +
  theme_minimal() +
  scale_x_date(date_breaks = "4 months", date_labels = "%m-%Y") +
  theme(axis.text.x=element_text(angle=60, hjust=1), panel.grid.minor = element_blank())
ggsave(here("figures", "monthly_means_anova.tiff"), units = "in", width = 6.6, height = 4, device='tiff', dpi=300)


classification_tab = read_csv(here("data", "classification_tab.csv"))
classification_tab = classification_tab %>%
  mutate(team = as_factor(team))
class_plot = ggplot(classification_tab, aes(fill=team, y=Count, x=Classification)) + 
    geom_bar(position="dodge", stat="identity") +
  geom_text(stat="identity", aes(label=Count), vjust=-1, size = 4, position = position_dodge(width = 1)) +
  scale_fill_manual(values = c(apsrcolors2)) + 
  theme_minimal() +
  theme(legend.position = "none", legend.title = element_blank(), plot.title = element_text(hjust = 0.5), axis.text=element_text(size=11)) +
  labs(x = " ", y = "New submissions", title = "A) Classification" ) +
  expand_limits(y = 500)


methodology_tab = read_csv(here("data", "methodology_tab.csv"))

small_n = methodology_tab %>%
  filter(methodology == "Case study or\nSmall N")
small_n_perc = (small_n$Current - small_n$Mannheim)/small_n$Mannheim*100

icp_n = methodology_tab %>%
  filter(methodology == "Interpretive,\nCritical, or\nPoststructural")
icp_n_perc = (icp_n$Current - icp_n$Mannheim)/icp_n$Mannheim*100

stat_n = methodology_tab %>%
  filter(methodology == "Statistical-\nObservational")
stat_n_perc = (stat_n$Current - stat_n$Mannheim)/stat_n$Mannheim*100
exp_n = methodology_tab %>%
  filter(methodology == "Experimental")
exp_n_perc = (exp_n$Current - exp_n$Mannheim)/exp_n$Mannheim*100

methodology_na = methodology_tab$Mannheim[7] + methodology_tab$Current[7] 


method_plot = methodology_tab %>%
  pivot_longer(cols = c("Mannheim", "Current"), values_to = "Count", names_to = "team") %>%
  filter(!is.na(methodology)) %>%
  ggplot(aes(fill = team, y=Count, x=methodology)) +
  geom_bar(position ="dodge", stat="identity") +
  geom_text(stat="identity", aes(label=Count), vjust=-1, size = 4, position = position_dodge(width = 1)) +
  # geom_text(stat='count', aes(label=..count..), vjust=-1, size = 4, position = position_dodge(width = 1)) +
  scale_fill_manual(values = c(apsrcolors2)) + 
  theme_minimal() +
  theme(legend.position = "bottom", legend.title = element_blank(), axis.text.x = element_text(vjust = 1), plot.title = element_text(hjust = 0.5), axis.text=element_text(size=11)) +
  labs(x = " ", y = "New submissions", title = "B) Epistemological or methodological approach" , caption = paste0("Note: Manuscripts may have zero or multiple classifications.\nExcludes ", methodology_na , " manuscripts with no reported approach.\nJanuary 1, 2018-October 31, 2022.", sep ="")) +
  expand_limits(y = 2000)


class_meth_plot = class_plot + method_plot +
  plot_layout(ncol = 1, guides = "collect") & theme(legend.position = 'bottom')
  
ggsave(here("figures", "submissions_class_method.tiff"), width = 6.5, height = 6.75, units = "in", device='tiff', dpi=300)


corr_author_region3_tab = read_csv(here("data", "corr_author_region3_tab.csv"))

region_na = corr_author_region3_tab %>%
  filter(corr_author_region3 == "N. America")
region_na_perc = (region_na$Current - region_na$Mannheim)/region_na$Mannheim*100
region_eur = corr_author_region3_tab %>%
  filter(corr_author_region3 == "Europe")
region_eur_perc = (region_eur$Current - region_eur$Mannheim)/region_eur$Mannheim*100
region_oth = corr_author_region3_tab %>%
  filter(corr_author_region3 == "Other")
region_oth_perc = (region_oth$Current - region_oth$Mannheim)/region_oth$Mannheim*100


gender = read_csv(here("data", "gender.csv")) %>%
  mutate(team = as_factor(team))
nonbinary = read_csv(here("data", "nonbinary.csv")) %>%
  mutate(team = as_factor(team))

nonbinary_mann = nonbinary %>%
  filter(team == "Mannheim") %>%
  select(nonbinary)
nonbinary_curr = nonbinary %>%
  filter(team == "Current") %>%
  select(nonbinary)

nonbin_uncoded = read_csv(here("data", "nonbin_uncoded.csv"))

nonbinary_solo = nonbin_uncoded %>%
  filter(author_team_id=="solo uncoded") 
nonbinary_team = nonbin_uncoded %>%
  filter(author_team_id=="uncoded team") 


team_men_mann = sum(gender$team=="Mannheim" & gender$author_team_id== "All man\nteam")
team_men_curr = sum(gender$team=="Current" & gender$author_team_id== "All man\nteam")
team_men_change = abs((team_men_curr- team_men_mann)/team_men_mann*100)


gender_plot = ggplot(gender, aes(y=author_team_id, fill = team)) +
  geom_bar(position ="dodge") +
  geom_text(stat='count', aes(label=..count..), vjust=-1, size = 4, position = position_dodge(width = 1)) +
  scale_fill_manual(values = c(apsrcolors2)) + 
  theme_minimal() +
  theme(legend.position = "none", legend.title = element_blank(), plot.title = element_text(hjust = 0.5), axis.text=element_text(size=11)) +
  labs(y = " ", x = "New submissions", title = "A) Author gender") +
  coord_flip() + 
  expand_limits(x = 1150)


race = read_csv(here("data", "race.csv")) %>%
  mutate(team = as_factor(team))

solo_poc_mann = sum(race$team == "Mannheim" & race$author_team_eth == "Solo,\nauthor\nof color")
solo_poc_curr = sum(race$team == "Current" & race$author_team_eth == "Solo,\nauthor\nof color")
solo_poc_perc = (solo_poc_curr-solo_poc_mann)/solo_poc_mann*100
team_1poc_mann = sum(race$team == "Mannheim" & race$author_team_eth == "Team,\n1+\nauthors\nof color")
team_1poc_curr = sum(race$team == "Current" & race$author_team_eth == "Team,\n1+\nauthors\nof color")
team_1poc_perc = (team_1poc_curr-team_1poc_mann)/team_1poc_mann*100


race_plot = ggplot(race, aes(y=author_team_eth, fill = team)) +
  geom_bar(position ="dodge") +
  geom_text(stat='count', aes(label=..count..), vjust=-1, size = 4, position = position_dodge(width = 1)) +
  scale_fill_manual(values = c(apsrcolors2)) + 
  theme_minimal() +
  theme(legend.position = "bottom", legend.title = element_blank(), axis.text.x = element_text(vjust = 1), plot.title = element_text(hjust = 0.5), axis.text=element_text(size=11)) +
  labs(y = " ", x = "New submissions", title = "B) Author race/ethnicity" , caption = paste0("Note: Mixed gender teams include ", nonbinary_team$nonbinary, " manuscripts with author(s) who identified as non-binary.\nJanuary 1, 2018-October 31, 2022, 29 months before and after editorial transition.", sep ="")) +
  coord_flip() + 
  expand_limits(x = 1150)

gender_race_plot = gender_plot + race_plot +
  plot_layout(ncol = 1, guides = "collect") & theme(legend.position = 'bottom')
  
ggsave(here("figures", "submissions_gender_race.tiff"), width = 6.5, height = 6.5, units = "in", device='tiff', dpi=300)


classification_inter = read_csv(here("data", "classification_inter.csv")) %>%
  mutate_all(as_factor)

lm1 = glm(GenderSex ~ author_team_gen_basic * team, data = classification_inter, binomial(link = "logit"))
lm2 = glm(Race ~ author_team_eth_basic * team, data = classification_inter, binomial(link = "logit"))
lm3 = glm(GlobalSouth ~ corr_author_region3 * team, data = classification_inter, binomial(link = "logit"))

lm1_ppr_95 = marginaleffects::avg_predictions(lm1, by = c("author_team_gen_basic", "team"))
lm2_ppr_95 = marginaleffects::avg_predictions(lm2, by = c("author_team_eth_basic", "team"))
lm3_ppr_95 = marginaleffects::avg_predictions(lm3, by = c("corr_author_region3", "team"))

lm1_ppr_84 = marginaleffects::avg_predictions(lm1, by = c("author_team_gen_basic", "team"), conf_level = 0.84)
lm2_ppr_84 = marginaleffects::avg_predictions(lm2, by = c("author_team_eth_basic", "team"), conf_level = 0.84)
lm3_ppr_84 = marginaleffects::avg_predictions(lm3, by = c("corr_author_region3", "team"), conf_level = 0.84)


ppr_95 = lm1_ppr_95 %>%
  bind_rows(lm2_ppr_95, lm3_ppr_95) %>%
  mutate(outcome = case_when(!is.na(author_team_gen_basic) ~ "Outcome:\nWomen, Gender & Sexuality", 
                             !is.na(author_team_eth_basic) ~ "Outcome:\nRace/Ethnicity", 
                             !is.na(corr_author_region3) ~ "Outcome:\nGlobal South"),
         outcome = as_factor(outcome),
         outcome = fct_relevel(outcome, "Outcome:\nGlobal South", "Outcome:\nWomen, Gender & Sexuality"),
         authors = NA_character_, 
         authors = if_else(!is.na(author_team_gen_basic), as.character(author_team_gen_basic), authors), 
         authors = if_else(!is.na(author_team_eth_basic), as.character(author_team_eth_basic), authors), 
         authors = if_else(!is.na(corr_author_region3), as.character(corr_author_region3), authors), 
         authors = as_factor(authors), 
         authors = fct_relevel(authors, "N. America", "Europe", "Other", "White\nauthor(s)", "1+\nAuthor(s)\nof color", "Men", "Women/\nNon-binary", "Mixed\nteam", "Declined")) %>%
  select(team, estimate, conf.low, conf.high, outcome, authors) %>%
  as_tibble()
           
ppr_84 = lm1_ppr_84 %>%
  bind_rows(lm2_ppr_84, lm3_ppr_84) %>%
  mutate(outcome = case_when(!is.na(author_team_gen_basic) ~ "Outcome:\nWomen, Gender & Sexuality", 
                             !is.na(author_team_eth_basic) ~ "Outcome:\nRace/Ethnicity", 
                             !is.na(corr_author_region3) ~ "Outcome:\nGlobal South"), 
         outcome = as_factor(outcome),
         outcome = fct_relevel(outcome, "Outcome:\nGlobal South", "Outcome:\nWomen, Gender & Sexuality"),
         authors = NA_character_, 
         authors = if_else(!is.na(author_team_gen_basic), as.character(author_team_gen_basic), authors), 
         authors = if_else(!is.na(author_team_eth_basic), as.character(author_team_eth_basic), authors), 
         authors = if_else(!is.na(corr_author_region3), as.character(corr_author_region3), authors), 
         authors = as_factor(authors), 
         authors = fct_relevel(authors, "N. America", "Europe", "Other", "White\nauthor(s)", "1+\nAuthor(s)\nof color", "Men", "Women/\nNon-binary", "Mixed\nteam", "Declined")) %>%
  select(team, estimate, conf.low, conf.high, outcome, authors) %>%
  as_tibble()


class_demo_results = ggplot() +
  geom_errorbar(data = ppr_84, aes(x=authors, ymin = conf.low, ymax = conf.high, color = team), width = .4, size = .8, position = position_dodge(width=0.9)) + 
  geom_linerange(data = ppr_95, aes(x=authors,ymin = conf.low, ymax = conf.high, color = team), position = position_dodge(width=0.9), linewidth = .8) + 
  geom_point(data = ppr_95, aes(x=authors, y = estimate, color = team, shape = team), position = position_dodge(width=0.9), size = 2) + 
  theme_minimal() +
  scale_color_manual(values = c(apsrcolors2)) + 
  scale_shape_manual(values = c(15, 16)) +
  theme(legend.position = "bottom", legend.title = element_blank()) + 
  labs(y = "Predicted Pr(Classification)", x = "Author characteristics", caption = "Note: Predicted probabilities w/84% & 95% confidence intervals from logistic regressions.\nJanuary 1, 2018-October 31, 2022, 29 months before and after editorial transition.") +
  facet_grid(~outcome,  scales = "free", space = "free")

ggsave(here("figures", "class_glm.tiff"), width = 6.5, height = 4, units = "in", device='tiff', dpi=300)


new_first = read_csv(here("data", "review_new_first.csv")) %>%
  mutate(team = as_factor(team))
new_first_labels = read_csv(here("data", "review_new_first_labels.csv"))%>%
  mutate(team = as_factor(team))

new_first_p = new_first %>%
  filter(!is.na(decision)) %>%
  ggplot(aes(x = days, y = reorder(team, desc(team)), fill = reorder(team, desc(team)))) +
  geom_density_ridges(na.rm=TRUE, scale = 1.5, bandwidth = 1.5) +
  geom_text(data = new_first_labels, aes(label=n, y=reorder(team, desc(team))), x = 20, vjust = -7.5, hjust =0) + 
  geom_text(data = new_first_labels, aes(label=median, y=reorder(team, desc(team))), x = 20, vjust = -6, hjust =0) + 
  scale_fill_manual(values = c(apsrcolors2_inv)) +
  facet_wrap(~decision) +
  scale_y_discrete(expand = c(0, 0)) + 
  scale_x_continuous(limits = c(-5, 50), breaks = c(0, 10, 20, 30, 40, 50)) +
  labs(x = " ", y = " ", ) +
  theme(legend.position = "none", strip.background = element_blank(), axis.text=element_text(size=11), strip.text.x = element_text(margin = margin(b = 8), size=12))


new_postreview_labels = read_csv(here("data", "review_new_postreview_labels.csv")) %>%
  mutate(team = as_factor(team))
new_postreview = read_csv(here("data", "review_new_postreview.csv")) %>%
  mutate(team = as_factor(team))

new_postrev_p = new_postreview %>%
  filter(!is.na(decision)) %>%
  ggplot(aes(x = days, y = reorder(team, desc(team)), fill = reorder(team, desc(team)))) +
  geom_density_ridges(na.rm=TRUE, scale = 1.2, bandwidth = 5) +
  geom_text(data = new_postreview_labels, aes(label=n, y=reorder(team, desc(team))), x = 120, vjust = -9.5, hjust =0) + 
  geom_text(data = new_postreview_labels, aes(label=median, y=reorder(team, desc(team))), x = 120, vjust = -8, hjust =0) + 
  scale_fill_manual(values = c(apsrcolors2_inv)) +
  facet_wrap(~decision) +
  scale_y_discrete(expand = c(0, 0)) + 
  scale_x_continuous(limits = c(30, 240), breaks = c(30, 60, 90, 120, 150, 180, 210, 240)) +
  labs(x = "Days since initial submission", y = " ", caption = "Note: January 1, 2018-October 31, 2022.") +
  theme(legend.position = "none", strip.background = element_blank(), axis.text=element_text(size=11), strip.text.x = element_text(margin = margin(b = 8), size=12))


review_turnaround = new_first_p + new_postrev_p + 
  plot_layout(ncol = 1, guides = "collect")
ggsave(here("figures", "review_turnaround.tiff"), width = 6.5, height = 7.25, units = "in", device='tiff', dpi=300)


rev_colors <- c("Mean invited reviews" = "black", "Mean completed reviews" = "dark grey", "Median days with reviewers (smoothed)" = "skyblue4")
sub_review = read_csv(here("data", "review_sub_review.csv"))
sub_new = read_csv(here("data", "review_sub_new.csv"))
review_mann = sub_review %>%
  filter(team == "Mannheim")
review_curr = sub_review %>%
  filter(team == "Current")


reviews_per = ggplot(sub_new, aes(x = year_month)) +
  geom_line(aes(y = mean_invited, color = "Mean invited reviews")) +
  geom_ribbon(aes(ymin = min_invited, ymax = max_invited), alpha = .35) + 
  geom_line(aes(y = mean_completed, color = "Mean completed reviews")) + 
  geom_ribbon(aes(ymin=min_completed, ymax = max_completed), alpha = .35) + 
  geom_smooth(aes(y = med_review/10, x=year_month, color = "Median days with reviewers (smoothed)"), formula = y ~ x, alpha = .3, method = "loess") +
  geom_vline(xintercept = as_date("2020-06-01"), linetype = "dotted") +
  scale_x_date(date_labels = "%m/%Y", date_breaks = "3 months", limits = as.Date(c("2018-01-01", "2022-11-01")), expand = c(0,0)) +
  theme(axis.text.x = element_text(angle = 40, hjust = 1), legend.position="bottom") +
  scale_y_continuous(limits = c(-0, 10), sec.axis = sec_axis( trans=~./10, labels = c(0,25,50,75,100), name="Median days w/reviewers (smoothed)")) +
  labs(x = "Month/Year", y = "Mean reviews invited & completed", color = "", caption = "Note: Includes only those manuscripts with decisions.\nJanuary 1, 2018-October 31, 2022, 29 months before and after editorial transition.") + 
  scale_color_manual(values = rev_colors) 
ggsave(here("figures", "reviewers.tiff"), width = 6, height = 4, units = "in", device='tiff', dpi=300)


revstats_byteam = read_csv(here("data", "review_revstats_byteam.csv")) %>%
  mutate(team = as_factor(team))


revstats_byteam_plot = revstats_byteam %>%
  ggplot(aes(x=name, y=`Proportion completed`, fill = team)) +
  geom_bar(stat= "identity", position ="dodge") +
  geom_errorbar(aes(ymin=min, ymax=max), width = .4, position=position_dodge(.9)) +
  scale_fill_manual(values = c(apsrcolors2)) + 
  theme_minimal() +
  theme(legend.position = "bottom", legend.title = element_blank()) +
  labs(x = " ", caption = "Note: 95% confidence intervals and includes only those manuscripts with decisions between\nJanuary 1, 2018-October 31, 2022.") 
ggsave(here("figures", "reviewer_stats.tiff"), width = 6.5, height = 4, units = "in", device='tiff', dpi=300)


